# Import PAM data
um <- read_csv("data/20201014_UM_Acer/PAM/UM_pamdata.csv")
crf <- read_csv("data/20201013_CRF_Acer/PAM/CRF_pamdata.csv")
rrt <- read_csv("data/20201007_RRT_Acer/PAM/RRT_Acer_pamdata.csv")
mote <- read_csv("data/20201011_Mote_Acer/PAM/Mote_pamdata.csv")
fwc <- read_csv("data/20201010_FWC_Acer/PAM/FWC_pamdata.csv")
df <- list(um = um, crf = crf, rrt = rrt, mote = mote, fwc = fwc) %>%
bind_rows(.id = "nursery")
df %>% distinct(nursery, geno) %>% nrow(.)
## [1] 193
# Initial filtering
dff <- df %>%
# Omit rows for one coral that had missing pam data at 32°C
filter(!(geno == "ML1" & nursery == "rrt" & max_temp == 32)) %>%
# Save raw fvfm in new column
mutate(fvfmraw = fvfm) %>%
# Make missing values zero (these were all high temp / no signal)
replace_na(replace = list(fvfm = 0)) %>%
# Identify problematic data points
mutate(problem = case_when(
max_temp >= 36 & fvfm >= 0.4 & f <= 250 ~ "abnormally high w/ low Ft",
fvfm > 0.750 ~ "abnormally high",
TRUE ~ "none")) %>%
mutate(fvfm = case_when(
problem == "abnormally high w/ low Ft" ~ 0, # Change abnormally high values at high temps to zero
problem == "abnormally high" ~ NA_real_, # Change all values > 0.750 to NA
problem == "none" ~ fvfm))
# Combine with KJS data
kjs <- read_csv("data/20200820_KJSNSU_Acer/KJS_pamdata.csv") %>%
mutate(fvfmraw = fvfm,
problem = case_when(
max_temp >= 36 & fvfm >= 0.5 & f <= 150 ~ "abnormally high w/ low Ft",
fvfm > 0.750 ~ "abnormally high",
TRUE ~ "none"),
fvfm = case_when(
problem == "abnormally high w/ low Ft" ~ 0, # Change abnormally high values at high temps to zero
problem == "abnormally high" ~ NA_real_, # Change all values > 0.750 to NA
problem == "none" ~ fvfm))
dff <- bind_rows(dff, kjs)
# Plot all data points
ggplot(dff, aes(x = max_temp, y = fvfm)) +
geom_point(alpha = 0.2)

# Fit 3-parameter LL model to each coral individually (faster than all together) for initial outlier detection
# Define function to fit 3-parameter LL model to data and return NULL if fitting error
ll3 <- function(data) {
drm(fvfm ~ max_temp, data = drop_na(data, fvfm),
fct = LL.3(names = c("hill", "max", "ED50")),
upperl = c(100, 0.67, NA),
lowerl = c(NA, 0.66, NA))}
tryll3 <- possibly(ll3, otherwise = NULL)
# Fit model to each coral, get parameters, fitted values, and residuals
initmods <- dff %>%
nest(data = c(f, fm, fvfm, fvfmraw, tank_id, tank_set, max_temp, problem)) %>%
# Fit the model to each coral
mutate(ll3 = map(data, tryll3)) %>%
# Get model parameters and fitted values/residuals
mutate(pars = map(ll3, tidy),
pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits, :
## non-finite finite-difference value [3]
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits, :
## non-finite finite-difference value [3]
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits, :
## non-finite finite-difference value [3]
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits, :
## L-BFGS-B needs finite values of 'fn'
pars <- initmods %>%
select(nursery, geno, pars) %>%
unnest(pars) %>%
filter(term == "ED50")
vals <- initmods %>%
select(nursery, geno, pred) %>%
unnest(pred) %>%
full_join(pars) %>%
full_join(dff) %>%
rename(ed50 = estimate)
# Define function to plot raw data, fitted values, and ed50 for each genotype
plotfits <- function(data) {
ggplot(data = data, aes(x = max_temp)) +
geom_point(aes(y = fvfmraw, color = problem), pch = 4, size = 1.25) +
geom_point(aes(y = fvfm), pch = 1, size = 2) +
geom_line(data = drop_na(data, .fitted),
aes(y = .fitted)) +
geom_vline(data = distinct(data, geno, ed50),
aes(xintercept = ed50),
lwd = 0.2, lty = 2) +
geom_text(data = distinct(data, geno, ed50),
aes(x = ed50, y = 0.05, label = round(ed50, 2)),
hjust = 1, nudge_x = -0.2, size = 3) +
facet_wrap(~ geno) +
scale_color_manual(values = c("black", "red", "orange", "blue"), drop = FALSE)
}
# Plot fits for each nursery
# plotfits(data = filter(vals, nursery == "rrt"))
# plotfits(data = filter(vals, nursery == "fwc"))
# plotfits(data = filter(vals, nursery == "mote"))
# plotfits(data = filter(vals, nursery == "crf"))
# plotfits(data = filter(vals, nursery == "um"))
# plotfits(data = filter(vals, nursery == "kjs"))
# Remove outlier datapoints
dfff <- vals %>%
mutate(problem = case_when(.resid > 0.15 ~ "high residual from fitted", TRUE ~ problem)) %>%
mutate(problem = factor(problem, levels = c("none", "abnormally high", "abnormally high w/ low Ft",
"high residual from fitted"))) %>%
mutate(fvfm = case_when(problem == "high residual from fitted" ~ NA_real_, TRUE ~ fvfm)) %>%
select(nursery, geno, f, fm, fvfm, fvfmraw, tank_id, tank_set, max_temp, problem)
# Refit models individually without outliers
# Fit model to each coral, get parameters, fitted values, and residuals
fmods <- dfff %>%
nest(data = c(f, fm, fvfm, fvfmraw, tank_id, tank_set, max_temp, problem)) %>%
# Fit the model to each coral
mutate(ll3 = map(data, tryll3)) %>%
# Get model parameters and fitted values/residuals
mutate(pars = map(ll3, tidy),
pred = map2(ll3, data, ~augment(.x, drop_na(.y, fvfm))))
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits, :
## non-finite finite-difference value [3]
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits, :
## non-finite finite-difference value [3]
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits, :
## non-finite finite-difference value [3]
## Error in optim(startVec, opfct, hessian = TRUE, method = "L-BFGS-B", lower = lowerLimits, :
## L-BFGS-B needs finite values of 'fn'
fpars <- fmods %>%
select(nursery, geno, pars) %>%
unnest(pars) %>%
filter(term == "ED50")
fvals <- fmods %>%
select(nursery, geno, pred) %>%
unnest(pred) %>%
full_join(fpars) %>%
full_join(dfff) %>%
rename(ed50 = estimate)
# plotfits(data = filter(fvals, nursery == "rrt"))
# plotfits(data = filter(fvals, nursery == "fwc"))
# plotfits(data = filter(fvals, nursery == "mote"))
# plotfits(data = filter(fvals, nursery == "crf"))
# plotfits(data = filter(fvals, nursery == "um"))
# plotfits(data = filter(fvals, nursery == "kjs"))
# fvals %>%
# mutate(genoN = paste0("g", group_indices(fvals, nursery, geno))) %>%
# ggplot(aes(x = fct_reorder(genoN, ed50), y = ed50)) +
# geom_point() +
# geom_errorbar(aes(ymin = ed50 - std.error, ymax = ed50 + std.error)) +
# facet_grid(~ nursery, scales = "free_x") #+
# #scale_y_continuous(limits = c(34.5, 38))
# Refit genos all together without outliers, and fixed maximum across genos
dfff$genoN <- factor(paste0("g", group_indices(dfff, nursery, geno)))
# m3f <- drm(fvfm ~ max_temp, genoN, data = dfff,
# fct = LL.3(names = c('hill', 'max', 'ED50')),
# lowerl = c(20, NA, NA),
# upperl = c(100, NA, NA), # upper limit of 100 on hill parameter
# pmodels = data.frame(genoN, 1, genoN)) # all genos have fixed maximum
#
# save(m3f, file = "data/ll3modf.RData")
load("data/ll3modf.RData")
# Tidy model output and join ED50 estimates with sample metadata
res3f <- tidy(m3f) %>%
filter(term == "ED50") %>%
mutate(genoN = curve) %>%
left_join(distinct(dfff, nursery, geno, genoN)) %>%
mutate(genoN = factor(genoN, levels = genoN[order(estimate)]))
# Get fitted/predicted values to look at fits for each genotype
pred3f <- augment(m3f, data.frame(m3f$data)) %>%
mutate(genoN = genoN.1) %>%
left_join(select(res3f, geno, genoN, nursery, ed50 = estimate)) %>% # add ed50 value for each genotype
full_join(dfff) %>%
mutate(problem = factor(problem, levels = levels(dfff$problem))) %>%
mutate(genoN = factor(genoN, levels = levels(res3f$genoN)))
# Plot for each nursery
plotfits(data = filter(pred3f, nursery == "rrt"))

plotfits(data = filter(pred3f, nursery == "fwc"))

plotfits(data = filter(pred3f, nursery == "mote"))

plotfits(data = filter(pred3f, nursery == "crf"))

plotfits(data = filter(pred3f, nursery == "um"))

plotfits(data = filter(pred3f, nursery == "kjs"))

# Plot ED50 estimates for all genotypes
res3f %>%
ggplot(aes(x = genoN, y = estimate)) +
geom_point() +
geom_errorbar(aes(ymin = estimate - std.error, ymax = estimate + std.error)) +
facet_grid(~ nursery, scales = "free_x") +
scale_x_discrete(labels = res3f$geno[order(res3f$genoN)])

res3f %>%
select(nursery, geno, ed50 = estimate, std.error, genoN) %>%
write_csv(path = "data/ed50_values.csv")
pred3f %>%
select(nursery, geno, genoN, max_temp, fvfmraw, fvfm, .fitted, ed50) %>%
write_csv(path = "data/pam_tidy.csv")